Load the data from the two years and bind them to create a single data frame:
# load data
eth_2018 <- read_csv("data/2018-q32/all-2018.csv")
eth_2019 <- read_csv("data/2019-q32/all-2019.csv")
# add year as the identifier column to each data frame
eth_2018 <- eth_2018 %>%
mutate(year = 2018)
eth_2019 <- eth_2019 %>%
mutate(year = 2019)
# bind the two data frames together
eth <- bind_rows(eth_2019, eth_2018) %>%
relocate(year)
Skim an overview of the data:
skim(eth) %>% kable()
| skim_type | skim_variable | n_missing | complete_rate | numeric.mean | numeric.sd | numeric.p0 | numeric.p25 | numeric.p50 | numeric.p75 | numeric.p100 | numeric.hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| numeric | year | 0 | 1 | 2018.5331020 | 0.4989697 | 2018 | 2018 | 2019 | 2019 | 2019 | ▇▁▁▁▇ |
| numeric | A1 | 0 | 1 | 0.9543513 | 0.2087499 | 0 | 1 | 1 | 1 | 1 | ▁▁▁▁▇ |
| numeric | A2 | 0 | 1 | 0.6553657 | 0.4753123 | 0 | 0 | 1 | 1 | 1 | ▅▁▁▁▇ |
| numeric | A3 | 0 | 1 | 0.6334757 | 0.4819193 | 0 | 0 | 1 | 1 | 1 | ▅▁▁▁▇ |
| numeric | A4 | 0 | 1 | 0.4911906 | 0.4999891 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A5 | 0 | 1 | 0.7394554 | 0.4389904 | 0 | 0 | 1 | 1 | 1 | ▃▁▁▁▇ |
| numeric | A6 | 0 | 1 | 0.6970101 | 0.4596122 | 0 | 0 | 1 | 1 | 1 | ▃▁▁▁▇ |
| numeric | A7 | 0 | 1 | 0.5104111 | 0.4999583 | 0 | 0 | 1 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A8 | 0 | 1 | 0.4586225 | 0.4983515 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A9 | 0 | 1 | 0.5475174 | 0.4978034 | 0 | 0 | 1 | 1 | 1 | ▆▁▁▁▇ |
| numeric | A10 | 0 | 1 | 0.5803524 | 0.4935671 | 0 | 0 | 1 | 1 | 1 | ▆▁▁▁▇ |
| numeric | A11 | 0 | 1 | 0.6348105 | 0.4815475 | 0 | 0 | 1 | 1 | 1 | ▅▁▁▁▇ |
| numeric | A12 | 0 | 1 | 0.5800854 | 0.4936105 | 0 | 0 | 1 | 1 | 1 | ▆▁▁▁▇ |
| numeric | A13 | 0 | 1 | 0.4671650 | 0.4989873 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A14 | 0 | 1 | 0.2020822 | 0.4016068 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▂ |
| numeric | A15 | 0 | 1 | 0.5995729 | 0.4900504 | 0 | 0 | 1 | 1 | 1 | ▆▁▁▁▇ |
| numeric | A16 | 0 | 1 | 0.3166044 | 0.4652137 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▃ |
| numeric | A17 | 0 | 1 | 0.7303791 | 0.4438221 | 0 | 0 | 1 | 1 | 1 | ▃▁▁▁▇ |
| numeric | A18 | 0 | 1 | 0.5301655 | 0.4991558 | 0 | 0 | 1 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A19 | 0 | 1 | 0.5165510 | 0.4997927 | 0 | 0 | 1 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A20 | 0 | 1 | 0.4249867 | 0.4944070 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▆ |
| numeric | A21 | 0 | 1 | 0.5864923 | 0.4925280 | 0 | 0 | 1 | 1 | 1 | ▆▁▁▁▇ |
| numeric | A22 | 0 | 1 | 0.8045916 | 0.3965677 | 0 | 1 | 1 | 1 | 1 | ▂▁▁▁▇ |
| numeric | A23 | 0 | 1 | 0.4113721 | 0.4921481 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▆ |
| numeric | A24 | 0 | 1 | 0.4535505 | 0.4979042 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▆ |
| numeric | A25 | 0 | 1 | 0.5106781 | 0.4999527 | 0 | 0 | 1 | 1 | 1 | ▇▁▁▁▇ |
| numeric | A26 | 0 | 1 | 0.6276028 | 0.4835080 | 0 | 0 | 1 | 1 | 1 | ▅▁▁▁▇ |
| numeric | A27 | 0 | 1 | 0.3836092 | 0.4863294 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▅ |
| numeric | A28 | 0 | 1 | 0.2020822 | 0.4016068 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▂ |
| numeric | A29 | 0 | 1 | 0.7653497 | 0.4238366 | 0 | 1 | 1 | 1 | 1 | ▂▁▁▁▇ |
| numeric | A30 | 0 | 1 | 0.6097170 | 0.4878788 | 0 | 0 | 1 | 1 | 1 | ▅▁▁▁▇ |
| numeric | A31 | 0 | 1 | 0.3392952 | 0.4735334 | 0 | 0 | 0 | 1 | 1 | ▇▁▁▁▅ |
| numeric | A32 | 0 | 1 | 0.2127603 | 0.4093141 | 0 | 0 | 0 | 0 | 1 | ▇▁▁▁▂ |
We can fit a Multidimensional Item Response Theory (mirt) model. From the function definition:
mirt fits a maximum likelihood (or maximum a posteriori) factor analysis model to any mixture of dichotomous and polytomous data under the item response theory paradigm using either Cai's (2010) Metropolis-Hastings Robbins-Monro (MHRM) algorithm.
The process is to first fit the model, and save the result as a model object that we can then parse to get tabular or visual displays of the model that we might want. When fitting the model, we have the option to specify a few arguments, which then get interpreted as parameters to be passed to the model.
fit_2pl <- mirt(
data = eth[2:33], # all but the year column
model = 1, # number of factors to extract
itemtype = "2PL", # 2 parameter logistic model
SE = TRUE # estimate standard errors
)
##
Iteration: 1, Log-Lik: -64759.178, Max-Change: 0.75785
Iteration: 2, Log-Lik: -63677.214, Max-Change: 0.26098
Iteration: 3, Log-Lik: -63526.153, Max-Change: 0.18006
Iteration: 4, Log-Lik: -63459.216, Max-Change: 0.11373
Iteration: 5, Log-Lik: -63424.801, Max-Change: 0.07319
Iteration: 6, Log-Lik: -63406.568, Max-Change: 0.06009
Iteration: 7, Log-Lik: -63395.715, Max-Change: 0.04387
Iteration: 8, Log-Lik: -63389.518, Max-Change: 0.03218
Iteration: 9, Log-Lik: -63385.913, Max-Change: 0.02534
Iteration: 10, Log-Lik: -63380.849, Max-Change: 0.00537
Iteration: 11, Log-Lik: -63380.732, Max-Change: 0.00437
Iteration: 12, Log-Lik: -63380.657, Max-Change: 0.00288
Iteration: 13, Log-Lik: -63380.552, Max-Change: 0.00220
Iteration: 14, Log-Lik: -63380.538, Max-Change: 0.00150
Iteration: 15, Log-Lik: -63380.529, Max-Change: 0.00120
Iteration: 16, Log-Lik: -63380.507, Max-Change: 0.00054
Iteration: 17, Log-Lik: -63380.505, Max-Change: 0.00035
Iteration: 18, Log-Lik: -63380.503, Max-Change: 0.00036
Iteration: 19, Log-Lik: -63380.499, Max-Change: 0.00020
Iteration: 20, Log-Lik: -63380.498, Max-Change: 0.00020
Iteration: 21, Log-Lik: -63380.498, Max-Change: 0.00019
Iteration: 22, Log-Lik: -63380.496, Max-Change: 0.00015
Iteration: 23, Log-Lik: -63380.496, Max-Change: 0.00013
Iteration: 24, Log-Lik: -63380.496, Max-Change: 0.00013
Iteration: 25, Log-Lik: -63380.496, Max-Change: 0.00008
##
## Calculating information matrix...
We then compute factor score estimates and augment the existing data frame with these estimates, to keep everything in one place. To do the estimation, we use the fscores() function from the mirt package which takes in a computed model object and computes factor score estimates according to the method specified. We will use the EAP method for factor score estimation, which is the “expected a-posteriori” method, the default. We specify it explicitl below, but the results would have been the same if we omitted specifying the method arhument since it’s the default method the function uses.
eth <- eth %>%
mutate(F1 = fscores(fit_2pl, method = "EAP"))
We can also calculate the model coefficient estimates using a generic function coef() which is used to extract model coefficients from objects returned by modeling functions. We will set the IRTpars argument to TRUE, which means slope intercept parameters will be converted into traditional IRT parameters.
coefs_2pl <- coef(fit_2pl, IRTpars = TRUE)
The resulting object coefs is a list, with one element for each question, and an additional GroupPars element that we won’t be using. The output is a bit long, so we’re only showing a few of the elements here:
coefs_2pl[1:3]
## $A1
## a b g u
## par 1.333600 -2.839314 0 1
## CI_2.5 1.132843 -3.145887 NA NA
## CI_97.5 1.534358 -2.532741 NA NA
##
## $A2
## a b g u
## par 1.266684 -0.6609272 0 1
## CI_2.5 1.156503 -0.7388837 NA NA
## CI_97.5 1.376865 -0.5829707 NA NA
##
## $A3
## a b g u
## par 1.091594 -0.6196620 0 1
## CI_2.5 0.991478 -0.7043376 NA NA
## CI_97.5 1.191711 -0.5349865 NA NA
# coefs_2pl[35:37]
Let’s take a closer look at the first element:
coefs_2pl[1]
## $A1
## a b g u
## par 1.333600 -2.839314 0 1
## CI_2.5 1.132843 -3.145887 NA NA
## CI_97.5 1.534358 -2.532741 NA NA
In this output:
a is discriminationb is difficultyTo make this output a little more user friendly, we should tidy it such that we have a row per question. We’ll do this in two steps. First, write a function that tidies the output for one question, i.e. one list element. Then, map this function over the list of all questions, resulting in a data frame.
tidy_mirt_coefs <- function(x){
x %>%
# melt the list element
melt() %>%
# convert to a tibble
as_tibble() %>%
# convert factors to characters
mutate(across(where(is.factor), as.character)) %>%
# only focus on rows where X2 is a or b (discrimination or difficulty)
filter(X2 %in% c("a", "b")) %>%
# in X1, relabel par (parameter) as est (estimate)
mutate(X1 = if_else(X1 == "par", "est", X1)) %>%
# unite columns X2 and X1 into a new column called var separated by _
unite(X2, X1, col = "var", sep = "_") %>%
# turn into a wider data frame
pivot_wider(names_from = var, values_from = value)
}
Let’s see what this does to a single element in coefs:
tidy_mirt_coefs(coefs_2pl[1])
## # A tibble: 1 x 7
## L1 a_est a_CI_2.5 a_CI_97.5 b_est b_CI_2.5 b_CI_97.5
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1 1.33 1.13 1.53 -2.84 -3.15 -2.53
And now let’s map it over all 32 elements of coefs:
tidy_2pl <- map_dfr(coefs_2pl[1:32], tidy_mirt_coefs, .id = "Question")
A quick peek at the result:
tidy_2pl
## # A tibble: 32 x 7
## Question a_est a_CI_2.5 a_CI_97.5 b_est b_CI_2.5 b_CI_97.5
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1 1.33 1.13 1.53 -2.84 -3.15 -2.53
## 2 A2 1.27 1.16 1.38 -0.661 -0.739 -0.583
## 3 A3 1.09 0.991 1.19 -0.620 -0.704 -0.535
## 4 A4 1.46 1.34 1.57 0.0357 -0.0255 0.0968
## 5 A5 1.45 1.33 1.58 -0.984 -1.07 -0.900
## 6 A6 1.30 1.19 1.41 -0.842 -0.925 -0.758
## 7 A7 1.06 0.965 1.16 -0.0470 -0.121 0.0273
## 8 A8 1.98 1.83 2.13 0.140 0.0866 0.193
## 9 A9 1.71 1.57 1.84 -0.166 -0.222 -0.109
## 10 A10 1.64 1.51 1.77 -0.289 -0.348 -0.230
## # … with 22 more rows
And a nicely formatted table of the result:
gt(tidy_2pl) %>%
fmt_number(columns = contains("_"), decimals = 3) %>%
data_color(
columns = contains("a_"),
colors = scales::col_numeric(palette = c("Greens"), domain = NULL)
) %>%
data_color(
columns = contains("b_"),
colors = scales::col_numeric(palette = c("Blues"), domain = NULL)
) %>%
tab_spanner(label = "Discrimination", columns = contains("a_")) %>%
tab_spanner(label = "Difficulty", columns = contains("b_")) %>%
cols_label(
a_est = "Est.",
b_est = "Est.",
a_CI_2.5 = "2.5%",
b_CI_2.5 = "2.5%",
a_CI_97.5 = "97.5%",
b_CI_97.5 = "97.5%"
)
Do students from different programmes of study have different distributions of ability?
# produce list of all the relevant file names
# (match only the courses beginning 401, i.e. exclude "all.csv")
files <- dir_ls("data/", recurse = TRUE, regexp = "401.*.csv")
# read all files and add a column called file_path to identify them
eth_entry_test <- vroom(files, id = "file_path")
# parse file_path column into year and class
eth_entry_test <- eth_entry_test %>%
mutate(
file_path = str_remove(file_path, "data/"),
file_path = str_remove(file_path, "-q32"),
file_path = str_remove(file_path, ".csv"),
) %>%
separate(file_path, c("year", "class"), sep = "/") %>%
mutate(class = str_remove(class, "-2019"))
# fit IRT model
fit_2pl <- mirt(
eth_entry_test[3:34], # all but the year and class columns
model = 1, # number of factors to extract
itemtype = "2PL", # 2 parameter logistic model
SE = TRUE # estimate standard errors
)
##
Iteration: 1, Log-Lik: -68293.892, Max-Change: 0.77657
Iteration: 2, Log-Lik: -67183.611, Max-Change: 0.26146
Iteration: 3, Log-Lik: -67031.612, Max-Change: 0.15439
Iteration: 4, Log-Lik: -66967.557, Max-Change: 0.12756
Iteration: 5, Log-Lik: -66935.528, Max-Change: 0.08196
Iteration: 6, Log-Lik: -66918.161, Max-Change: 0.05595
Iteration: 7, Log-Lik: -66908.485, Max-Change: 0.04141
Iteration: 8, Log-Lik: -66902.913, Max-Change: 0.03006
Iteration: 9, Log-Lik: -66899.690, Max-Change: 0.02325
Iteration: 10, Log-Lik: -66895.249, Max-Change: 0.00515
Iteration: 11, Log-Lik: -66895.136, Max-Change: 0.00432
Iteration: 12, Log-Lik: -66895.062, Max-Change: 0.00313
Iteration: 13, Log-Lik: -66894.934, Max-Change: 0.00219
Iteration: 14, Log-Lik: -66894.918, Max-Change: 0.00090
Iteration: 15, Log-Lik: -66894.911, Max-Change: 0.00075
Iteration: 16, Log-Lik: -66894.892, Max-Change: 0.00031
Iteration: 17, Log-Lik: -66894.891, Max-Change: 0.00036
Iteration: 18, Log-Lik: -66894.890, Max-Change: 0.00032
Iteration: 19, Log-Lik: -66894.886, Max-Change: 0.00020
Iteration: 20, Log-Lik: -66894.886, Max-Change: 0.00018
Iteration: 21, Log-Lik: -66894.886, Max-Change: 0.00018
Iteration: 22, Log-Lik: -66894.885, Max-Change: 0.00014
Iteration: 23, Log-Lik: -66894.885, Max-Change: 0.00013
Iteration: 24, Log-Lik: -66894.885, Max-Change: 0.00012
Iteration: 25, Log-Lik: -66894.885, Max-Change: 0.00010
##
## Calculating information matrix...
# augment data with factor scores from model fit
eth_entry_test <- eth_entry_test %>%
mutate(F1 = fscores(fit_2pl, method = "EAP"))
Compare the distribution of abilities in the two years.
ggplot(eth_entry_test, aes(F1, fill = as.factor(year), colour = as.factor(year))) +
geom_density(alpha=0.5) +
scale_x_continuous(limits = c(-3.5,3.5)) +
labs(title = "Density plot",
subtitle = "Ability grouped by year of taking the test",
x = "Ability", y = "Density",
fill = "Year", colour = "Year") +
theme_minimal()
There does not seem to be a big difference between the two year groups, so we combine them in the following analysis.
Compare the distribution of abilities in the various classes.
ggplot(eth_entry_test, aes(x = F1, y = class, colour = class, fill = class)) +
geom_density_ridges(alpha = 0.5) +
scale_x_continuous(limits = c(-3.5,3.5)) +
guides(fill = FALSE, colour = FALSE) +
labs(title = "Density plot",
subtitle = "Ability grouped by class of taking the test",
x = "Ability", y = "Class") +
theme_minimal()
## Picking joint bandwidth of 0.239
eth_2019 <- eth_2019 %>% relocate(year)
eth_2018 <- eth_2018 %>% relocate(year)
fit_2pl_2019 <- mirt(
data = eth_2019[2:33], # all but the year column
model = 1, # number of factors to extract
itemtype = "2PL", # 2 parameter logistic model
SE = TRUE # estimate standard errors
)
##
Iteration: 1, Log-Lik: -34659.618, Max-Change: 0.72752
Iteration: 2, Log-Lik: -34073.399, Max-Change: 0.31468
Iteration: 3, Log-Lik: -33988.068, Max-Change: 0.18029
Iteration: 4, Log-Lik: -33956.457, Max-Change: 0.09592
Iteration: 5, Log-Lik: -33940.005, Max-Change: 0.07839
Iteration: 6, Log-Lik: -33931.402, Max-Change: 0.05357
Iteration: 7, Log-Lik: -33926.561, Max-Change: 0.04641
Iteration: 8, Log-Lik: -33923.842, Max-Change: 0.03289
Iteration: 9, Log-Lik: -33922.227, Max-Change: 0.02725
Iteration: 10, Log-Lik: -33921.127, Max-Change: 0.01801
Iteration: 11, Log-Lik: -33920.605, Max-Change: 0.01363
Iteration: 12, Log-Lik: -33920.298, Max-Change: 0.01214
Iteration: 13, Log-Lik: -33919.909, Max-Change: 0.01085
Iteration: 14, Log-Lik: -33919.870, Max-Change: 0.00379
Iteration: 15, Log-Lik: -33919.851, Max-Change: 0.00251
Iteration: 16, Log-Lik: -33919.823, Max-Change: 0.00091
Iteration: 17, Log-Lik: -33919.820, Max-Change: 0.00064
Iteration: 18, Log-Lik: -33919.818, Max-Change: 0.00055
Iteration: 19, Log-Lik: -33919.813, Max-Change: 0.00023
Iteration: 20, Log-Lik: -33919.813, Max-Change: 0.00013
Iteration: 21, Log-Lik: -33919.813, Max-Change: 0.00013
Iteration: 22, Log-Lik: -33919.812, Max-Change: 0.00014
Iteration: 23, Log-Lik: -33919.812, Max-Change: 0.00012
Iteration: 24, Log-Lik: -33919.811, Max-Change: 0.00011
Iteration: 25, Log-Lik: -33919.811, Max-Change: 0.00008
##
## Calculating information matrix...
fit_2pl_2018 <- mirt(
data = eth_2018[2:33], # all but the year column
model = 1, # number of factors to extract
itemtype = "2PL", # 2 parameter logistic model
SE = TRUE # estimate standard errors
)
##
Iteration: 1, Log-Lik: -29923.734, Max-Change: 0.86625
Iteration: 2, Log-Lik: -29439.479, Max-Change: 0.37017
Iteration: 3, Log-Lik: -29354.165, Max-Change: 0.16329
Iteration: 4, Log-Lik: -29315.094, Max-Change: 0.12033
Iteration: 5, Log-Lik: -29294.868, Max-Change: 0.08713
Iteration: 6, Log-Lik: -29283.271, Max-Change: 0.09644
Iteration: 7, Log-Lik: -29276.508, Max-Change: 0.06389
Iteration: 8, Log-Lik: -29272.598, Max-Change: 0.04379
Iteration: 9, Log-Lik: -29270.229, Max-Change: 0.02973
Iteration: 10, Log-Lik: -29268.367, Max-Change: 0.03265
Iteration: 11, Log-Lik: -29267.570, Max-Change: 0.02199
Iteration: 12, Log-Lik: -29267.073, Max-Change: 0.01249
Iteration: 13, Log-Lik: -29266.546, Max-Change: 0.00900
Iteration: 14, Log-Lik: -29266.456, Max-Change: 0.00750
Iteration: 15, Log-Lik: -29266.400, Max-Change: 0.00517
Iteration: 16, Log-Lik: -29266.333, Max-Change: 0.00315
Iteration: 17, Log-Lik: -29266.320, Max-Change: 0.00193
Iteration: 18, Log-Lik: -29266.312, Max-Change: 0.00180
Iteration: 19, Log-Lik: -29266.294, Max-Change: 0.00078
Iteration: 20, Log-Lik: -29266.292, Max-Change: 0.00020
Iteration: 21, Log-Lik: -29266.292, Max-Change: 0.00016
Iteration: 22, Log-Lik: -29266.291, Max-Change: 0.00017
Iteration: 23, Log-Lik: -29266.291, Max-Change: 0.00016
Iteration: 24, Log-Lik: -29266.290, Max-Change: 0.00015
Iteration: 25, Log-Lik: -29266.289, Max-Change: 0.00012
Iteration: 26, Log-Lik: -29266.289, Max-Change: 0.00011
Iteration: 27, Log-Lik: -29266.289, Max-Change: 0.00011
Iteration: 28, Log-Lik: -29266.288, Max-Change: 0.00009
##
## Calculating information matrix...
plot(fit_2pl_2019, type = "info", main = "Test information, 2019")
plot(fit_2pl_2018, type = "info", main = "Test information, 2019")
We can get individual item surface and information plots using the itemplot() function from the mirt package, e.g.
mirt::itemplot(fit_2pl_2019, item = 1,
main = "2019 2PL\nTrace lines for item 1")
We can also get the plots for all trace lines, one facet per plot.
plot(fit_2pl_2019, type = "trace", auto.key = FALSE)
Or all of them overlaid in one plot.
plot(fit_2pl_2019, type = "trace", facet_items=FALSE)
An alternative approach is using ggplot2 and plotly to add interactivity to make it easier to identify items.
# store the object
plt <- plot(fit_2pl_2019, type = "trace", facet_items = FALSE)
# the data we need is in panel.args
plt_data <- tibble(
x = plt$panel.args[[1]]$x,
y = plt$panel.args[[1]]$y,
subscripts = plt$panel.args[[1]]$subscripts,
item = rep(colnames(eth_2019[2:33]), each = 200)
) %>%
mutate(
item_no = str_remove(item, "A") %>% as.numeric(),
item = fct_reorder(item, item_no)
)
head(plt_data)
## # A tibble: 6 x 5
## x y subscripts item item_no
## <dbl> <dbl> <int> <fct> <dbl>
## 1 -6 0.0222 1 A1 1
## 2 -5.94 0.0239 2 A1 1
## 3 -5.88 0.0258 3 A1 1
## 4 -5.82 0.0277 4 A1 1
## 5 -5.76 0.0299 5 A1 1
## 6 -5.70 0.0321 6 A1 1
p_2019 <- ggplot(plt_data, aes(x, y,
colour = item,
text = item)) +
geom_line() +
labs(
title = "2019 2PL - Trace lines",
#x = expression(theta),
x = "theta",
#y = expression(P(theta)),
y = "P(theta)",
colour = "Item"
) +
theme_minimal() +
theme(legend.position = "none")
ggplotly(p_2019, tooltip = "text")
# store the object
plt <- plot(fit_2pl_2018, type = "trace", facet_items = FALSE)
# the data we need is in panel.args
plt_data <- tibble(
x = plt$panel.args[[1]]$x,
y = plt$panel.args[[1]]$y,
subscripts = plt$panel.args[[1]]$subscripts,
item = rep(colnames(eth_2018[2:33]), each = 200)
) %>%
mutate(
item_no = str_remove(item, "A") %>% as.numeric(),
item = fct_reorder(item, item_no)
)
head(plt_data)
## # A tibble: 6 x 5
## x y subscripts item item_no
## <dbl> <dbl> <int> <fct> <dbl>
## 1 -6 0.00937 1 A1 1
## 2 -5.94 0.0102 2 A1 1
## 3 -5.88 0.0111 3 A1 1
## 4 -5.82 0.0121 4 A1 1
## 5 -5.76 0.0131 5 A1 1
## 6 -5.70 0.0143 6 A1 1
p_2018 <- ggplot(plt_data, aes(x, y,
colour = item,
text = item)) +
geom_line() +
labs(
title = "2018 2PL - Trace lines",
#x = expression(theta),
x = "theta",
#y = expression(P(theta)),
y = "P(theta)",
colour = "Item"
) +
theme_minimal() +
theme(legend.position = "none")
ggplotly(p_2018, tooltip = "text")
## Warning in dev_fun(file = tempfile(), width = width %||% 640, height = height
## %||% : partial argument match of 'file' to 'filename'
knitr::knit_exit()